####################################
# Code to create replicate         #
# multiple imputation for:         #
#                                  #
# "From Economic Competition to    #
# Military Combat: Export          #
# Similarity and International     #
# Conflict" (main text)            #
#                                  #
# J. Tyson Chatagnier and Kerim    #
# Can Kavakli                      #
#                                  #
# Published in the Journal of      #
# Conflict Resolution              #
####################################

rm(list=ls(all=TRUE))

library(foreign)
library(Amelia)
library(DataCombine)

sim.data <- read.dta("export_similarity_data.dta")

sim.data$dyadid <- sim.data$ccode1 + 0.001 * sim.data$ccode2


##############################
# Create lagged versions     #
# of some variables, as well #
# as dummies for decades.    #
##############################

sim.data$l.pwin <- slide(data.frame(var    = sim.data$pwin_cap,
                                    dyadid = sim.data$dyadid,
                                    year   = sim.data$year
                                    ),
                         Var      = "var",
                         GroupVar = "dyadid",
                         slideBy  = -1
                         )[, 4]
                        # Use the slide() function from the DataCombine
                        # package to lag by one unit. The fourth column
                        # of the resulting matrix is the lagged vector.

sim.data$l.s <- slide(data.frame(var    = sim.data$s3un,
                                 dyadid = sim.data$dyadid,
                                 year   = sim.data$year
                                 ),
                      Var      = "var",
                      GroupVar = "dyadid",
                      slideBy  = -1
                      )[, 4]


sim.data$sixties <- ifelse(sim.data$year < 1970,
                           1,
                           0
                           )

sim.data$seventies <- ifelse(sim.data$year >= 1970 & sim.data$year < 1980,
                             1,
                             0
                             )

sim.data$eighties <- ifelse(sim.data$year >= 1980 & sim.data$year < 1990,
                            1,
                            0
                            )

sim.data$nineties <- ifelse(sim.data$year >= 1990,
                            1,
                            0
                            )
                        # We are combining 2000 into the 90s.

sim.data$l.sim <- slide(data.frame(var    = sim.data$exports,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.dep <- slide(data.frame(var    = sim.data$lowerdep2015,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.riv <- slide(data.frame(var    = sim.data$rivalry,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.jointdem <- slide(data.frame(var    = sim.data$jointdem,
                                        dyadid = sim.data$dyadid,
                                        year   = sim.data$year
                                        ),
                             Var      = "var",
                             GroupVar = "dyadid",
                             slideBy  = -1
                             )[, 4]


#################################################
# Multiple Imputation: Countries from 1962-2000 #
#################################################

rel.data <- data.frame(contig    = sim.data$contig_dummy,
                       lndist    = sim.data$lndist,
                       caprat    = sim.data$l.pwin,
                       jointdem  = sim.data$l.jointdem,
                       l.s       = sim.data$l.s,
                       majmaj    = sim.data$majmaj,
                       minmaj    = sim.data$majmin,
                       rivals    = sim.data$l.riv,
                       europe    = sim.data$both_europe,
                       sixties   = sim.data$sixties,
                       seventies = sim.data$seventies,
                       eighties  = sim.data$eighties,
                       trade     = sim.data$l.dep,
                       sim       = sim.data$l.sim,
                       py        = sim.data$mid_py,
                       py.host   = sim.data$hihost_py,
                       py.fatal  = sim.data$fatal_py,
                       mid       = sim.data$mid_init,
                       host      = sim.data$hihost_init,
                       fatal     = sim.data$fatal_init,
                       polrel    = sim.data$polrel,
                       ongoing   = sim.data$ongoing,
                       year      = sim.data$year,
                       dyadid    = sim.data$dyadid
                       )

b.mids.pr <- NULL
b.fatal.pr <- NULL
b.hihost.pr <- NULL
b.mids.all <- NULL
b.fatal.all <- NULL
b.hihost.all <- NULL
b.mids.nomp <- NULL
b.fatal.nomp <- NULL
b.hihost.nomp <- NULL
                        # Initialize coefficient objects

se.mids.pr <- NULL
se.fatal.pr <- NULL
se.hihost.pr <- NULL
se.mids.all <- NULL
se.fatal.all <- NULL
se.hihost.all <- NULL
se.mids.nomp <- NULL
se.fatal.nomp <- NULL
se.hihost.nomp <- NULL
                        # Initialize standard error objects

###############################
# Ensure we don't extrapolate #
###############################

dyads <- unique(rel.data$dyadid)

data.list <- vector("list",
                    length = length(dyads)
                    )

for (i in 1 : length(dyads)) {
  data.sub <- rel.data[rel.data$dyadid == dyads[i], ]
  if (any(!is.na(data.sub$sim))) {
    min.yr <- min(data.sub$year[!is.na(data.sub$sim)])
    max.yr <- max(data.sub$year[!is.na(data.sub$sim)])
    data.sub <- data.sub[data.sub$year >= min.yr &
                         data.sub$year <= max.yr,
                         ]
  } else {
    data.sub <- NULL
  }
  data.list[[i]] <- data.sub
}
                        # Ensure that we only use imputation
                        # to interpolate. Remove any observations
                        # before the first or after the last year
                        # for which we have data for each country.

rel.data <- do.call("rbind",
                    data.list
                    )

######################
# Run the imputation #
######################

set.seed(2015)

data.imp <- amelia(rel.data,
                   ts       = "year",
                   cs       = "dyadid",
                   polytime = 3,
                   p2s      = 2,
                   noms     = c("contig",
                                "jointdem",
                                "majmaj",
                                "minmaj",
                                "europe",
                                "sixties",
                                "seventies",
                                "eighties",
                                "rivals",
                                "mid",
                                "host",
                                "fatal",
                                "polrel"
                                ),
                   ords     = c("py",
                                "py.fatal",
                                "py.host"
                                )
                   )

data.imp <- transform(data.imp, 
                      py2 = py ^ 2
                      )
data.imp <- transform(data.imp, 
                      py3 = py ^ 3
                      )
data.imp <- transform(data.imp, 
                      py2.fatal = py.fatal ^ 2
                      )
data.imp <- transform(data.imp, 
                      py3.fatal = py.fatal ^ 3
                      )
data.imp <- transform(data.imp, 
                      py2.host = py.host ^ 2
                      )
data.imp <- transform(data.imp, 
                      py3.host = py.host ^ 3
                      )

for (i in 1 : data.imp$m) {

  data.set <- data.imp$imputations[[i]]

  x <- as.matrix(data.set[, 1 : 14])
  py.mid <- as.matrix(data.set[, c(15, 24, 25)])
  py.fatal <- as.matrix(data.set[, c(16, 26, 27)])
  py.host <- as.matrix(data.set[, c(17, 28, 29)])

  mod.mids.all <- glm(data.set$mid ~ x + py.mid,
                      subset = data.set$ongoing == 0,
                      family = binomial(link = "logit")
                      )


  mod.fatal.all <- glm(data.set$fatal ~ x + py.fatal,
                       subset = data.set$ongoing == 0,
                       family = binomial(link = "logit")
                       )


  mod.hihost.all <- glm(data.set$host ~ x + py.host,
                        subset = data.set$ongoing == 0,
                        family = binomial(link = "logit")
                        )

  data.set <- data.set[data.set$polrel == 1, ]

  x <- as.matrix(data.set[, 1 : 14])
  py.mid <- as.matrix(data.set[, c(15, 24, 25)])
  py.fatal <- as.matrix(data.set[, c(16, 26, 27)])
  py.host <- as.matrix(data.set[, c(17, 28, 29)])

  mod.mids.pr <- glm(data.set$mid ~ x + py.mid,
                     subset = data.set$ongoing == 0,
                     family = binomial(link = "logit")
                     )


  mod.fatal.pr <- glm(data.set$fatal ~ x + py.fatal,
                      subset = data.set$ongoing == 0,
                      family = binomial(link = "logit")
                      )


  mod.hihost.pr <- glm(data.set$host ~ x + py.host,
                       subset = data.set$ongoing == 0,
                       family = binomial(link = "logit")
                       )


  data.set <- data.set[data.set$majmaj == 0, ]

  x <- as.matrix(data.set[, c(1 : 5, 7 : 14)])
  py.mid <- as.matrix(data.set[, c(15, 24, 25)])
  py.fatal <- as.matrix(data.set[, c(16, 26, 27)])
  py.host <- as.matrix(data.set[, c(17, 28, 29)])


  mod.mids.nomp <- glm(data.set$mid ~ x + py.mid,
                       subset = data.set$ongoing == 0,
                       family = binomial(link = "logit")
                       )


  mod.fatal.nomp <- glm(data.set$fatal ~ x + py.fatal,
                        subset = data.set$ongoing == 0,
                        family = binomial(link = "logit")
                        )


  mod.hihost.nomp <- glm(data.set$host ~ x + py.host,
                         subset = data.set$ongoing == 0,
                         family = binomial(link = "logit")
                         )


  b.mids.pr <- rbind(b.mids.pr, 
                     mod.mids.pr$coefficients
                     )
  b.fatal.pr <- rbind(b.fatal.pr, 
                      mod.fatal.pr$coefficients
                      )
  b.hihost.pr <- rbind(b.hihost.pr, 
                       mod.hihost.pr$coefficients
                       )
  b.mids.all <- rbind(b.mids.all, 
                      mod.mids.all$coefficients
                      )
  b.fatal.all <- rbind(b.fatal.all, 
                       mod.fatal.all$coefficients
                       )
  b.hihost.all <- rbind(b.hihost.all, 
                        mod.hihost.all$coefficients
                        )
  b.mids.nomp <- rbind(b.mids.nomp, 
                       mod.mids.nomp$coefficients
                       )
  b.fatal.nomp <- rbind(b.fatal.nomp, 
                        mod.fatal.nomp$coefficients
                        )
  b.hihost.nomp <- rbind(b.hihost.nomp, 
                         mod.hihost.nomp$coefficients
                         )

  se.mids.pr <- rbind(se.mids.pr, 
                      sqrt(diag(vcov(mod.mids.pr)))
                      )
  se.fatal.pr <- rbind(se.fatal.pr, 
                       sqrt(diag(vcov(mod.fatal.pr)))
                       )
  se.hihost.pr <- rbind(se.hihost.pr, 
                        sqrt(diag(vcov(mod.hihost.pr)))
                        )
  se.mids.all <- rbind(se.mids.all, 
                       sqrt(diag(vcov(mod.mids.all)))
                       )
  se.fatal.all <- rbind(se.fatal.all, 
                        sqrt(diag(vcov(mod.fatal.all)))
                        )
  se.hihost.all <- rbind(se.hihost.all, 
                         sqrt(diag(vcov(mod.hihost.all)))
                         )
  se.mids.nomp <- rbind(se.mids.nomp, 
                        sqrt(diag(vcov(mod.mids.nomp)))
                        )
  se.fatal.nomp <- rbind(se.fatal.nomp, 
                         sqrt(diag(vcov(mod.fatal.nomp)))
                         )
  se.hihost.nomp <- rbind(se.hihost.nomp, 
                          sqrt(diag(vcov(mod.hihost.nomp)))
                          )
                        # We use asymptotic, rather than clustered standard
                        # errors here, since we're using fixed effects for
                        # what would be the clusters.
}

combined.mids.pr <- mi.meld(q  = b.mids.pr, 
                            se = se.mids.pr
                            )
combined.fatal.pr <- mi.meld(q  = b.fatal.pr, 
                             se = se.fatal.pr
                             )
combined.hihost.pr <- mi.meld(q  = b.hihost.pr, 
                              se = se.hihost.pr
                              )
combined.mids.all <- mi.meld(q  = b.mids.all, 
                             se = se.mids.all
                             )
combined.fatal.all <- mi.meld(q  = b.fatal.all, 
                              se = se.fatal.all
                              )
combined.hihost.all <- mi.meld(q  = b.hihost.all, 
                               se = se.hihost.all
                               )
combined.mids.nomp <- mi.meld(q  = b.mids.nomp, 
                              se = se.mids.nomp
                              )
combined.fatal.nomp <- mi.meld(q  = b.fatal.nomp, 
                               se = se.fatal.nomp
                               )
combined.hihost.nomp <- mi.meld(q  = b.hihost.nomp, 
                                se = se.hihost.nomp
                                )
                        # These objects provide the coefficients and standard
                        # errors corresponding respectively to the nine
                        # columns in Table 12 of the Appendix.

varnames <- c("Constant",
              "Contiguity",    
              "Distance (logged)",
              "Capability Ratio",
              "Joint Democracy",
              "UN Voting",
              "Both Major Powers",
              "Major and Minor Power",
              "Both European",
              "1960s",
              "1970s",
              "1980s",
              "Trade Dependence",
              "Rivals",
              "Export Similarity",
              "t",
              "t$^{2}$",
              "t$^{3}$"
              )

colnames(combined.mids.all$q.mi) <- varnames
colnames(combined.fatal.all$q.mi) <- varnames
colnames(combined.hihost.all$q.mi) <- varnames
colnames(combined.mids.pr$q.mi) <- varnames
colnames(combined.fatal.pr$q.mi) <- varnames
colnames(combined.hihost.pr$q.mi) <- varnames
colnames(combined.mids.nomp$q.mi) <- varnames[-7]
colnames(combined.fatal.nomp$q.mi) <- varnames[-7]
colnames(combined.hihost.nomp$q.mi) <- varnames[-7]

colnames(combined.mids.all$se.mi) <- varnames
colnames(combined.fatal.all$se.mi) <- varnames
colnames(combined.hihost.all$se.mi) <- varnames
colnames(combined.mids.pr$se.mi) <- varnames
colnames(combined.fatal.pr$se.mi) <- varnames
colnames(combined.hihost.pr$se.mi) <- varnames
colnames(combined.mids.nomp$se.mi) <- varnames[-7]
colnames(combined.fatal.nomp$se.mi) <- varnames[-7]
colnames(combined.hihost.nomp$se.mi) <- varnames[-7]
                        # Provide names for each of the coefficients
                        # and standard errors, corresponding to the table.
                        # the objects can be called and compared to Table 12.
